{
 "metadata": {
  "name": "",
  "signature": "sha256:a9eebd34b149c3c030b4256d857ef5171f5a0b76224da80d6620b3050ae86364"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "heading",
     "level": 1,
     "metadata": {},
     "source": [
      "Financial and Economic Data Applications"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from __future__ import division\n",
      "from pandas import Series, DataFrame\n",
      "import pandas as pd\n",
      "from numpy.random import randn\n",
      "import numpy as np\n",
      "pd.options.display.max_rows = 12\n",
      "np.set_printoptions(precision=4, suppress=True)\n",
      "import matplotlib.pyplot as plt\n",
      "plt.rc('figure', figsize=(12, 6))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "%matplotlib inline"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "%pwd"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "%cd ../book_scripts"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 2,
     "metadata": {},
     "source": [
      "Data munging topics"
     ]
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "Time series and cross-section alignment"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "close_px = pd.read_csv('ch11/stock_px.csv', parse_dates=True, index_col=0)\n",
      "volume = pd.read_csv('ch11/volume.csv', parse_dates=True, index_col=0)\n",
      "prices = close_px.ix['2011-09-05':'2011-09-14', ['AAPL', 'JNJ', 'SPX', 'XOM']]\n",
      "volume = volume.ix['2011-09-05':'2011-09-12', ['AAPL', 'JNJ', 'XOM']]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "prices"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "volume"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "prices * volume"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "vwap = (prices * volume).sum() / volume.sum()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "vwap"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "vwap.dropna()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "prices.align(volume, join='inner')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "s1 = Series(range(3), index=['a', 'b', 'c'])\n",
      "s2 = Series(range(4), index=['d', 'b', 'c', 'e'])\n",
      "s3 = Series(range(3), index=['f', 'a', 'c'])\n",
      "DataFrame({'one': s1, 'two': s2, 'three': s3})"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "DataFrame({'one': s1, 'two': s2, 'three': s3}, index=list('face'))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "Operations with time series of different frequencies"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts1 = Series(np.random.randn(3),\n",
      "             index=pd.date_range('2012-6-13', periods=3, freq='W-WED'))\n",
      "ts1"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts1.resample('B')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts1.resample('B', fill_method='ffill')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "dates = pd.DatetimeIndex(['2012-6-12', '2012-6-17', '2012-6-18',\n",
      "                          '2012-6-21', '2012-6-22', '2012-6-29'])\n",
      "ts2 = Series(np.random.randn(6), index=dates)\n",
      "ts2"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts1.reindex(ts2.index, method='ffill')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts2 + ts1.reindex(ts2.index, method='ffill')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 4,
     "metadata": {},
     "source": [
      "Using periods instead of timestamps"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "gdp = Series([1.78, 1.94, 2.08, 2.01, 2.15, 2.31, 2.46],\n",
      "             index=pd.period_range('1984Q2', periods=7, freq='Q-SEP'))\n",
      "infl = Series([0.025, 0.045, 0.037, 0.04],\n",
      "              index=pd.period_range('1982', periods=4, freq='A-DEC'))\n",
      "gdp"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "infl"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "infl_q = infl.asfreq('Q-SEP', how='end')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "infl_q"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "infl_q.reindex(gdp.index, method='ffill')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "Time of day and \"as of\" data selection"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# Make an intraday date range and time series\n",
      "rng = pd.date_range('2012-06-01 09:30', '2012-06-01 15:59', freq='T')\n",
      "# Make a 5-day series of 9:30-15:59 values\n",
      "rng = rng.append([rng + pd.offsets.BDay(i) for i in range(1, 4)])\n",
      "ts = Series(np.arange(len(rng), dtype=float), index=rng)\n",
      "ts"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from datetime import time\n",
      "ts[time(10, 0)]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts.at_time(time(10, 0))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ts.between_time(time(10, 0), time(10, 1))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "np.random.seed(12346)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# Set most of the time series randomly to NA\n",
      "indexer = np.sort(np.random.permutation(len(ts))[700:])\n",
      "irr_ts = ts.copy()\n",
      "irr_ts[indexer] = np.nan\n",
      "irr_ts['2012-06-01 09:50':'2012-06-01 10:00']"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "selection = pd.date_range('2012-06-01 10:00', periods=4, freq='B')\n",
      "irr_ts.asof(selection)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "Splicing together data sources"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "data1 = DataFrame(np.ones((6, 3), dtype=float),\n",
      "                  columns=['a', 'b', 'c'],\n",
      "                  index=pd.date_range('6/12/2012', periods=6))\n",
      "data2 = DataFrame(np.ones((6, 3), dtype=float) * 2,\n",
      "                  columns=['a', 'b', 'c'],\n",
      "                  index=pd.date_range('6/13/2012', periods=6))\n",
      "spliced = pd.concat([data1.ix[:'2012-06-14'], data2.ix['2012-06-15':]])\n",
      "spliced"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "data2 = DataFrame(np.ones((6, 4), dtype=float) * 2,\n",
      "                  columns=['a', 'b', 'c', 'd'],\n",
      "                  index=pd.date_range('6/13/2012', periods=6))\n",
      "spliced = pd.concat([data1.ix[:'2012-06-14'], data2.ix['2012-06-15':]])\n",
      "spliced"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "spliced_filled = spliced.combine_first(data2)\n",
      "spliced_filled"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "spliced.update(data2, overwrite=False)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "spliced"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "cp_spliced = spliced.copy()\n",
      "cp_spliced[['a', 'c']] = data1[['a', 'c']]\n",
      "cp_spliced"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "Return indexes and cumulative returns"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import pandas.io.data as web\n",
      "price = web.get_data_yahoo('AAPL', '2011-01-01')['Adj Close']\n",
      "price[-5:]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "price['2011-10-03'] / price['2011-3-01'] - 1"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "returns = price.pct_change()\n",
      "ret_index = (1 + returns).cumprod()\n",
      "ret_index[0] = 1  # Set first value to 1\n",
      "ret_index"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "m_returns = ret_index.resample('BM', how='last').pct_change()\n",
      "m_returns['2012']"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "m_rets = (1 + returns).resample('M', how='prod', kind='period') - 1\n",
      "m_rets['2012']"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "raw",
     "metadata": {},
     "source": [
      "returns[dividend_dates] += dividend_pcts"
     ]
    },
    {
     "cell_type": "heading",
     "level": 2,
     "metadata": {},
     "source": [
      "Group transforms and analysis"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pd.options.display.max_rows = 100\n",
      "pd.options.display.max_columns = 10\n",
      "np.random.seed(12345)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import random; random.seed(0)\n",
      "import string\n",
      "\n",
      "N = 1000\n",
      "def rands(n):\n",
      "    choices = string.ascii_uppercase\n",
      "    return ''.join([random.choice(choices) for _ in xrange(n)])\n",
      "tickers = np.array([rands(5) for _ in xrange(N)])"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "M = 500\n",
      "df = DataFrame({'Momentum' : np.random.randn(M) / 200 + 0.03,\n",
      "                'Value' : np.random.randn(M) / 200 + 0.08,\n",
      "                'ShortInterest' : np.random.randn(M) / 200 - 0.02},\n",
      "                index=tickers[:M])"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ind_names = np.array(['FINANCIAL', 'TECH'])\n",
      "sampler = np.random.randint(0, len(ind_names), N)\n",
      "industries = Series(ind_names[sampler], index=tickers,\n",
      "                    name='industry')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "by_industry = df.groupby(industries)\n",
      "by_industry.mean()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "by_industry.describe()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# Within-Industry Standardize\n",
      "def zscore(group):\n",
      "    return (group - group.mean()) / group.std()\n",
      "\n",
      "df_stand = by_industry.apply(zscore)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "df_stand.groupby(industries).agg(['mean', 'std'])"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# Within-industry rank descending\n",
      "ind_rank = by_industry.rank(ascending=False)\n",
      "ind_rank.groupby(industries).agg(['min', 'max'])"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# Industry rank and standardize\n",
      "by_industry.apply(lambda x: zscore(x.rank()))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "Group factor exposures"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from numpy.random import rand\n",
      "fac1, fac2, fac3 = np.random.rand(3, 1000)\n",
      "\n",
      "ticker_subset = tickers.take(np.random.permutation(N)[:1000])\n",
      "\n",
      "# Weighted sum of factors plus noise\n",
      "port = Series(0.7 * fac1 - 1.2 * fac2 + 0.3 * fac3 + rand(1000),\n",
      "              index=ticker_subset)\n",
      "factors = DataFrame({'f1': fac1, 'f2': fac2, 'f3': fac3},\n",
      "                    index=ticker_subset)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "factors.corrwith(port)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pd.ols(y=port, x=factors).beta"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "def beta_exposure(chunk, factors=None):\n",
      "    return pd.ols(y=chunk, x=factors).beta"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "by_ind = port.groupby(industries)\n",
      "exposures = by_ind.apply(beta_exposure, factors=factors)\n",
      "exposures.unstack()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "Decile and quartile analysis"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import pandas.io.data as web\n",
      "data = web.get_data_yahoo('SPY', '2006-01-01')\n",
      "data.info()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "px = data['Adj Close']\n",
      "returns = px.pct_change()\n",
      "\n",
      "def to_index(rets):\n",
      "    index = (1 + rets).cumprod()\n",
      "    first_loc = max(index.index.get_loc(index.idxmax()) - 1, 0)\n",
      "    index.values[first_loc] = 1\n",
      "    return index\n",
      "\n",
      "def trend_signal(rets, lookback, lag):\n",
      "    signal = pd.rolling_sum(rets, lookback, min_periods=lookback - 5)\n",
      "    return signal.shift(lag)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "signal = trend_signal(returns, 100, 3)\n",
      "trade_friday = signal.resample('W-FRI').resample('B', fill_method='ffill')\n",
      "trade_rets = trade_friday.shift(1) * returns\n",
      "trade_rets = trade_rets[:len(returns)]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "to_index(trade_rets).plot()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "vol = pd.rolling_std(returns, 250, min_periods=200) * np.sqrt(250)\n",
      "\n",
      "def sharpe(rets, ann=250):\n",
      "    return rets.mean() / rets.std()  * np.sqrt(ann)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "cats = pd.qcut(vol, 4)\n",
      "print('cats: %d, trade_rets: %d, vol: %d' % (len(cats), len(trade_rets), len(vol)))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "trade_rets.groupby(cats).agg(sharpe)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 2,
     "metadata": {},
     "source": [
      "More example applications"
     ]
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "Signal frontier analysis"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "names = ['AAPL', 'GOOG', 'MSFT', 'DELL', 'GS', 'MS', 'BAC', 'C']\n",
      "def get_px(stock, start, end):\n",
      "    return web.get_data_yahoo(stock, start, end)['Adj Close']\n",
      "px = DataFrame({n: get_px(n, None, None) for n in names})"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "px = pd.read_pickle('notebooks/stock_prices')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "plt.close('all')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "px = px.asfreq('B').fillna(method='pad')\n",
      "rets = px.pct_change()\n",
      "((1 + rets).cumprod() - 1).plot()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "def calc_mom(price, lookback, lag):\n",
      "    mom_ret = price.shift(lag).pct_change(lookback)\n",
      "    ranks = mom_ret.rank(axis=1, ascending=False)\n",
      "    demeaned = ranks.subtract(ranks.mean(axis=1), axis=0)\n",
      "    return demeaned.divide(demeaned.std(axis=1), axis=0)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "compound = lambda x : (1 + x).prod() - 1\n",
      "daily_sr = lambda x: x.mean() / x.std()\n",
      "\n",
      "def strat_sr(prices, lb, hold):\n",
      "    # Compute portfolio weights\n",
      "    freq = '%dB' % hold\n",
      "    port = calc_mom(prices, lb, lag=1)\n",
      "\n",
      "    daily_rets = prices.pct_change()\n",
      "\n",
      "    # Compute portfolio returns\n",
      "    port = port.shift(1).resample(freq, how='first')\n",
      "    returns = daily_rets.resample(freq, how=compound)\n",
      "    port_rets = (port * returns).sum(axis=1)\n",
      "\n",
      "    return daily_sr(port_rets) * np.sqrt(252 / hold)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "strat_sr(px, 70, 30)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from collections import defaultdict\n",
      "\n",
      "lookbacks = range(20, 90, 5)\n",
      "holdings = range(20, 90, 5)\n",
      "dd = defaultdict(dict)\n",
      "for lb in lookbacks:\n",
      "    for hold in holdings:\n",
      "        dd[lb][hold] = strat_sr(px, lb, hold)\n",
      "\n",
      "ddf = DataFrame(dd)\n",
      "ddf.index.name = 'Holding Period'\n",
      "ddf.columns.name = 'Lookback Period'"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import matplotlib.pyplot as plt\n",
      "\n",
      "def heatmap(df, cmap=plt.cm.gray_r):\n",
      "    fig = plt.figure()\n",
      "    ax = fig.add_subplot(111)\n",
      "    axim = ax.imshow(df.values, cmap=cmap, interpolation='nearest')\n",
      "    ax.set_xlabel(df.columns.name)\n",
      "    ax.set_xticks(np.arange(len(df.columns)))\n",
      "    ax.set_xticklabels(list(df.columns))\n",
      "    ax.set_ylabel(df.index.name)\n",
      "    ax.set_yticks(np.arange(len(df.index)))\n",
      "    ax.set_yticklabels(list(df.index))\n",
      "    plt.colorbar(axim)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "heatmap(ddf)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "Future contract rolling"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pd.options.display.max_rows = 10"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import pandas.io.data as web\n",
      "# Approximate price of S&P 500 index\n",
      "px = web.get_data_yahoo('SPY')['Adj Close'] * 10\n",
      "px"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from datetime import datetime\n",
      "expiry = {'ESU2': datetime(2012, 9, 21),\n",
      "          'ESZ2': datetime(2012, 12, 21)}\n",
      "expiry = Series(expiry).order()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "expiry"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "np.random.seed(12347)\n",
      "N = 200\n",
      "walk = (np.random.randint(0, 200, size=N) - 100) * 0.25\n",
      "perturb = (np.random.randint(0, 20, size=N) - 10) * 0.25\n",
      "walk = walk.cumsum()\n",
      "\n",
      "rng = pd.date_range(px.index[0], periods=len(px) + N, freq='B')\n",
      "near = np.concatenate([px.values, px.values[-1] + walk])\n",
      "far = np.concatenate([px.values, px.values[-1] + walk + perturb])\n",
      "prices = DataFrame({'ESU2': near, 'ESZ2': far}, index=rng)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "prices.tail()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "def get_roll_weights(start, expiry, items, roll_periods=5):\n",
      "    # start : first date to compute weighting DataFrame\n",
      "    # expiry : Series of ticker -> expiration dates\n",
      "    # items : sequence of contract names\n",
      "\n",
      "    dates = pd.date_range(start, expiry[-1], freq='B')\n",
      "    weights = DataFrame(np.zeros((len(dates), len(items))),\n",
      "                        index=dates, columns=items)\n",
      "\n",
      "    prev_date = weights.index[0]\n",
      "    for i, (item, ex_date) in enumerate(expiry.iteritems()):\n",
      "        if i < len(expiry) - 1:\n",
      "            weights.ix[prev_date:ex_date - pd.offsets.BDay(), item] = 1\n",
      "            roll_rng = pd.date_range(end=ex_date - pd.offsets.BDay(),\n",
      "                                     periods=roll_periods + 1, freq='B')\n",
      "\n",
      "            decay_weights = np.linspace(0, 1, roll_periods + 1)\n",
      "            weights.ix[roll_rng, item] = 1 - decay_weights\n",
      "            weights.ix[roll_rng, expiry.index[i + 1]] = decay_weights\n",
      "        else:\n",
      "            weights.ix[prev_date:, item] = 1\n",
      "\n",
      "        prev_date = ex_date\n",
      "\n",
      "    return weights"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "weights = get_roll_weights('6/1/2012', expiry, prices.columns)\n",
      "weights.ix['2012-09-12':'2012-09-21']"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "rolled_returns = (prices.pct_change() * weights).sum(1)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 3,
     "metadata": {},
     "source": [
      "Rolling correlation and linear regression"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "aapl = web.get_data_yahoo('AAPL', '2000-01-01')['Adj Close']\n",
      "msft = web.get_data_yahoo('MSFT', '2000-01-01')['Adj Close']\n",
      "\n",
      "aapl_rets = aapl.pct_change()\n",
      "msft_rets = msft.pct_change()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "plt.figure()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pd.rolling_corr(aapl_rets, msft_rets, 250).plot()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "plt.figure()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "model = pd.ols(y=aapl_rets, x={'MSFT': msft_rets}, window=250)\n",
      "model.beta"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "model.beta['MSFT'].plot()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    }
   ],
   "metadata": {}
  }
 ]
}